notebook.community

Edit and run



In [7]:

    
%matplotlib inline
import matplotlib.pyplot as plt



In [9]:

    
xlow = 0
xhigh = np.pi/2
x = np.linspace(xlow, xhigh, num = 100)
y = np.cos(x)



In [10]:

    
plt.figure()
plt.plot(x, y)
plt.show()



In [18]:

    
dim = 100
x_mcmc = np.random.random(dim) * (xhigh - xlow) + xlow
y_mcmc = np.cos(x_mcmc)



In [19]:

    
plt.figure()
plt.plot(x_mcmc, y_mcmc, 'o')
plt.show()



In [21]:

    
y_mcmc.mean() * (xhigh - xlow)









    Out[21]:





0.99904262914854747



In [ ]:

    
(xhigh - xlow) * np.sqrt(y_mcmc.



In [1]:

    
from numpy.random import random

Metropolis-Hastings Algorithm

the following code is from http://code.activestate.com/recipes/414200-metropolis-hastings-sampler/



In [23]:

    
def sdnorm(z):
    """
    Standard normal pdf (Probability Density Function)
    """
    return np.exp(-z*z/2.)/np.sqrt(2*np.pi)

n = 10000
alpha = 1
x = 0.
vec = []
vec.append(x)
innov = random(n) * 2 * alpha - alpha #random inovation, uniform proposal distribution
for i in xrange(1,n):
    can = x + innov[i] #candidate
    aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
    u = random(1)
    if u[0] < aprob:
        x = can
        vec.append(x)



In [24]:

    
len(vec)









    Out[24]:





8066



In [25]:

    
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()

my version of the code



In [41]:

    
def metropolis_hastings(f, init = 0, nsamples = 1000):
    alpha = 1
    sample_count = 0
    x = init
    vec = []
    vec.append(x)
    while sample_count < nsamples-1:
        can = x + random(1)[0] * 2 * alpha - alpha #candidate
        aprob = np.min([1.,sdnorm(can)/sdnorm(x)]) #acceptance probability
        u = random(1)
        if u[0] < aprob:
            x = can
            vec.append(x)
            sample_count+=1
    return np.array(vec)



In [42]:

    
vec = metropolis_hastings(sdnorm, nsamples=10000)



In [43]:

    
len(vec)









    Out[43]:





10000



In [44]:

    
#plotting the results:
#theoretical curve
x = np.arange(-3,3,.1)
y = sdnorm(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.legend(('PDF','Samples'))
plt.show()



In [45]:

    
def gauss(z):
    """
    Standard normal gaussian
    """
    return np.exp(-z*z/2.)



In [48]:

    
import scipy.integrate as integrate



In [50]:

    
integrate.quad(gauss, -np.Inf, np.Inf)









    Out[50]:





(2.5066282746309994, 2.5512942192316024e-08)



In [51]:

    
np.sqrt(2*np.pi)









    Out[51]:





2.5066282746310002



In [167]:

    
def triangle(z):
    if (type(z) is not type([1])) and (type(z) is not type(np.array(1))):
        x = np.array([z])
    else:
        x = np.array(z)
    return np.piecewise(x, [x < 0, (x < 10) & (x > 0), (x >= 10) & (x <= 20), x > 20], [0, lambda x: x, lambda x: - x + 20, 0])



In [168]:

    
triangle([1])









    Out[168]:





array([1])



In [169]:

    
x = np.arange(-100,100,.1)
plt.figure()
plt.plot(x, triangle(x))
plt.show()



In [170]:

    
type(np.array([1]))









    Out[170]:





numpy.ndarray



In [174]:

    
integrate.quad(triangle, -np.Inf, np.Inf)









    Out[174]:





(100.00000004456021, 8.648601692584634e-07)



In [178]:

    
vec = metropolis_hastings(lambda f: 1/100*triangle(x), nsamples=10000)



In [181]:

    
x = np.arange(0,100,.1)
y = 1/100 * triangle(x)
plt.subplot(211)
plt.title('Metropolis-Hastings')
plt.plot(vec)
plt.subplot(212)

plt.hist(vec, bins=30,normed=1)
plt.plot(x,y,'ro')
plt.ylabel('Frequency')
plt.xlabel('x')
plt.xlim((-30,30))
plt.legend(('PDF','Samples'))
plt.show()



In [184]:

    
import pymc



In [185]:

    
@pymc.stochastic(dtype=int)
def switchpoint(value=1900, t_l=1851, t_h=1962):
    """The switchpoint for the rate of disaster occurrence."""
    if value > t_h or value < t_l:
        # Invalid values
        return -np.inf
    else:
        # Uniform log-likelihood
        return -np.log(t_h - t_l + 1)



In [189]:

    
from scipy.stats import rv_discrete
from scipy import stats
normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
n_sample = 500
np.random.seed(87655678)   # fix the seed for replicability
rvs = normdiscrete.rvs(size=n_sample)
rvsnd = rvs
f, l = np.histogram(rvs, bins=gridlimits)
sfreq = np.vstack([gridint, f, probs*n_sample]).T
print sfreq









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-189-b50054185a3f> in <module>()
      1 from scipy.stats import rv_discrete
      2 from scipy import stats
----> 3 normdiscrete = stats.rv_discrete(values=(gridint, np.round(probs, decimals=7)), name='normdiscrete')
      4 n_sample = 500
      5 np.random.seed(87655678)   # fix the seed for replicability

NameError: name 'gridint' is not defined



In [219]:

    
from scipy import stats
class deterministic_gen(stats.rv_continuous):
     def _pdf(self, x):
        return 1/x



In [220]:

    
deterministic = deterministic_gen(name="deterministic")



In [222]:

    
deterministic.cdf(np.arange(1, 3, 0.5))









    



---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
<ipython-input-222-48cd819759f9> in <module>()
----> 1 deterministic.cdf(np.arange(1, 3, 0.5))

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in cdf(self, x, *args, **kwds)
   1342         if any(cond):  # call only if at least 1 entry
   1343             goodargs = argsreduce(cond, *((x,)+args))
-> 1344             place(output,cond,self._cdf(*goodargs))
   1345         if output.ndim == 0:
   1346             return output[()]

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _cdf(self, x, *args)
   1199 
   1200     def _cdf(self, x, *args):
-> 1201         return self.veccdf(x,*args)
   1202 
   1203     def _logcdf(self, x, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _cdf_single_call(self, x, *args)
   1196 
   1197     def _cdf_single_call(self, x, *args):
-> 1198         return integrate.quad(self._pdf, self.a, x, args=args)[0]
   1199 
   1200     def _cdf(self, x, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/integrate/quadpack.pyc in quad(func, a, b, args, full_output, epsabs, epsrel, limit, points, weight, wvar, wopts, maxp1, limlst)
    252         args = (args,)
    253     if (weight is None):
--> 254         retval = _quad(func,a,b,args,full_output,epsabs,epsrel,limit,points)
    255     else:
    256         retval = _quad_weight(func,a,b,args,full_output,epsabs,epsrel,limlst,limit,maxp1,weight,wvar,wopts)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/integrate/quadpack.pyc in _quad(func, a, b, args, full_output, epsabs, epsrel, limit, points)
    319             return _quadpack._qagse(func,a,b,args,full_output,epsabs,epsrel,limit)
    320         else:
--> 321             return _quadpack._qagie(func,bound,infbounds,args,full_output,epsabs,epsrel,limit)
    322     else:
    323         if infbounds != 0:

<ipython-input-219-9d04ce8ef423> in _pdf(self, x)
      2 class deterministic_gen(stats.rv_continuous):
      3      def _pdf(self, x):
----> 4         return 1/x

ZeroDivisionError: float division by zero



In [223]:

    
deterministic.pdf(np.arange(-3, 3, 0.5))









    



-c:4: RuntimeWarning: divide by zero encountered in divide






    Out[223]:





array([-0.33333333, -0.4       , -0.5       , -0.66666667, -1.        ,
       -2.        ,         inf,  2.        ,  1.        ,  0.66666667,
        0.5       ,  0.4       ])



In [224]:

    
deterministic.rvs(size=10)









    



---------------------------------------------------------------------------
RuntimeError                              Traceback (most recent call last)
<ipython-input-224-eafdfe3a1ee3> in <module>()
----> 1 deterministic.rvs(size=10)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _rvs(self, *args)
   1192         ## Use basic inverse cdf algorithm for RV generation as default.
   1193         U = mtrand.sample(self._size)
-> 1194         Y = self._ppf(U,*args)
   1195         return Y
   1196 

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf(self, q, *args)
   1211 
   1212     def _ppf(self, q, *args):
-> 1213         return self.vecfunc(q,*args)
   1214 
   1215     def _isf(self, q, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf_single_call(self, q, *args)
   1154 
   1155         return optimize.brentq(self._ppf_to_solve,
-> 1156                                left, right, args=(q,)+args, xtol=self.xtol)
   1157 
   1158     # moment from definition

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/optimize/zeros.pyc in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp)
    413     if rtol < _rtol:
    414         raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol))
--> 415     r = _zeros._brentq(f,a,b,xtol,rtol,maxiter,args,full_output,disp)
    416     return results_c(full_output, r)
    417 

RuntimeError: Failed to converge after 100 iterations.



In [195]:

    
deterministic?



In [239]:

    
def getDistribution(data):
    kernel = stats.gaussian_kde(data)
    class rv(stats.rv_continuous):
        def _rvs(self, *x, **y):
            # don't ask me why it's using self._size 
            # nor why I have to cast to int
            return kernel.resample(int(self._size)) 
        def _cdf(self, x):
            return kernel.integrate_box_1d(-numpy.Inf, x)
        def _pdf(self, x):
            return kernel.evaluate(x)
    return rv(name='kdedist')



In [240]:

    
f = getDistribution(np.random.randn(100))



In [243]:

    
f.rvs([1])









    



---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-243-d05ef689c9d3> in <module>()
----> 1 f.rvs([1])

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

<ipython-input-239-8854fe964a63> in _rvs(self, *x, **y)
      5             # don't ask me why it's using self._size
      6             # nor why I have to cast to int
----> 7             return kernel.resample(int(self._size))
      8         def _cdf(self, x):
      9             return kernel.integrate_box_1d(-numpy.Inf, x)

TypeError: int() argument must be a string or a number, not 'NoneType'



In [231]:

    
def getDistribution(data):
    kernel = stats.gaussian_kde(data)
    class rv(stats.rv_continuous):
        def _cdf(self, x):
            return kernel.integrate_box_1d(-numpy.Inf, x)
        def _pdf(self, x):
            return kernel.evaluate(x)
    return rv(name='kdedist')









    Out[231]:





array([ 0.21934769])



In [244]:

    
kernel = stats.gaussian_kde(np.random.randn(100))



In [245]:

    
kernel









    Out[245]:





<scipy.stats.kde.gaussian_kde at 0x10b0f2a90>



In [246]:

    
type(kernel)









    Out[246]:





scipy.stats.kde.gaussian_kde



In [247]:

    
kernel.resample(10)









    Out[247]:





array([[ 0.73375879, -1.45096952,  0.50167046, -0.27989526, -0.5990068 ,
        -0.35663271, -2.82007892,  0.59557902, -0.71221009,  0.40275279]])



In [256]:

    
def getDistribution():
    f = np.random.normal
    class rv(stats.rv_continuous):
        def _pdf(self, x):
            return f(x)
    return rv(name='kdedist')



In [257]:

    
a = getDistribution()



In [262]:

    
a.rvs(size=10)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-262-a96fcc4f0fde> in <module>()
----> 1 a.rvs(size=10)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in rvs(self, *args, **kwds)
    669             return loc*ones(size, 'd')
    670 
--> 671         vals = self._rvs(*args)
    672         if self._size is not None:
    673             vals = reshape(vals, size)

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _rvs(self, *args)
   1192         ## Use basic inverse cdf algorithm for RV generation as default.
   1193         U = mtrand.sample(self._size)
-> 1194         Y = self._ppf(U,*args)
   1195         return Y
   1196 

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf(self, q, *args)
   1211 
   1212     def _ppf(self, q, *args):
-> 1213         return self.vecfunc(q,*args)
   1214 
   1215     def _isf(self, q, *args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in __call__(self, *args, **kwargs)
   1570             vargs.extend([kwargs[_n] for _n in names])
   1571 
-> 1572         return self._vectorize_call(func=func, args=vargs)
   1573 
   1574     def _get_ufunc_and_otypes(self, func, args):

/Users/schriste/anaconda/lib/python2.7/site-packages/numpy/lib/function_base.pyc in _vectorize_call(self, func, args)
   1636                       for _a in args]
   1637 
-> 1638             outputs = ufunc(*inputs)
   1639 
   1640             if ufunc.nout == 1:

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/stats/distributions.pyc in _ppf_single_call(self, q, *args)
   1154 
   1155         return optimize.brentq(self._ppf_to_solve,
-> 1156                                left, right, args=(q,)+args, xtol=self.xtol)
   1157 
   1158     # moment from definition

/Users/schriste/anaconda/lib/python2.7/site-packages/scipy/optimize/zeros.pyc in brentq(f, a, b, args, xtol, rtol, maxiter, full_output, disp)
    413     if rtol < _rtol:
    414         raise ValueError("rtol too small (%g < %g)" % (rtol, _rtol))
--> 415     r = _zeros._brentq(f,a,b,xtol,rtol,maxiter,args,full_output,disp)
    416     return results_c(full_output, r)
    417 

ValueError: f(a) and f(b) must have different signs



In [264]:

    
import scipy.integrate as integrate
result = integrate.quad(lambda x: np.random.normal(x), 0, 4.5)